Capítulo 2 FPP3

Author

Eddie Aguilar

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.0     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   3.4.1     ✔ tibble    3.2.0
✔ lubridate 1.9.2     ✔ tidyr     1.3.0
✔ purrr     1.0.1     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(fpp3)
── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──
✔ tsibble     1.1.3     ✔ fable       0.3.2
✔ tsibbledata 0.4.1     ✔ fabletools  0.3.2
✔ feasts      0.3.0     
── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
✖ lubridate::date()    masks base::date()
✖ dplyr::filter()      masks stats::filter()
✖ tsibble::intersect() masks base::intersect()
✖ tsibble::interval()  masks lubridate::interval()
✖ dplyr::lag()         masks stats::lag()
✖ tsibble::setdiff()   masks base::setdiff()
✖ tsibble::union()     masks base::union()

Book

# How to make a tsibble: 

(y <- tsibble(
  Year = 2015:2019,
  Observation = c(123, 39, 78, 52, 110),
  index = Year
))
# A tsibble: 5 x 2 [1Y]
   Year Observation
  <int>       <dbl>
1  2015         123
2  2016          39
3  2017          78
4  2018          52
5  2019         110
# Out of a tibble:
(y <- tibble(
  Year = 2015:2019,
  Observation = c(123, 39, 78, 52, 110)
) %>% 
  as_tsibble(index = Year))
# A tsibble: 5 x 2 [1Y]
   Year Observation
  <int>       <dbl>
1  2015         123
2  2016          39
3  2017          78
4  2018          52
5  2019         110

Using ansett tsibble, we graph only the flights from Melbourne to Sydney in economy class, we can see several drops on christmas day due to the lack of flights that day of the year, several values of 0 before 1990.

ansett
# A tsibble: 7,407 x 4 [1W]
# Key:       Airports, Class [30]
       Week Airports Class    Passengers
     <week> <chr>    <chr>         <dbl>
 1 1989 W28 ADL-PER  Business        193
 2 1989 W29 ADL-PER  Business        254
 3 1989 W30 ADL-PER  Business        185
 4 1989 W31 ADL-PER  Business        254
 5 1989 W32 ADL-PER  Business        191
 6 1989 W33 ADL-PER  Business        136
 7 1989 W34 ADL-PER  Business          0
 8 1989 W35 ADL-PER  Business          0
 9 1989 W36 ADL-PER  Business          0
10 1989 W37 ADL-PER  Business          0
# … with 7,397 more rows
melsyd_economy <- ansett |>
  filter(Airports == "MEL-SYD", Class == "Economy") |>
  mutate(Passengers = Passengers/1000)
autoplot(melsyd_economy, Passengers) +
  labs(title = "Ansett airlines economy class",
       subtitle = "Melbourne-Sydney",
       y = "Passengers ('000)")

Time series patterns

Trend: When there is a long-term increase or decrease in the data.

Seasonal: A series is influenced by seasonal factors.

Cyclic: Rises and falls in the data that are not of fixed period.

NOTE: Annual data cannot have seasonal pattern.

us_employment %>% 
  filter(Title == "Retail Trade", year(Month) >= 1980) %>% 
  autoplot(Employed / 1e3) +
  labs(y = "Milllion people", title = "Retail employment, USA")

We can see an overall increasing trend, an obvious seasonal pattern every summer in the US and finally a cycle every several years.

Seasonal plot

# Daily
vic_elec |> gg_season(Demand, period = "day") +
  theme(legend.position = "none") +
  labs(y="MWh", title="Electricity demand: Victoria")

# Weekly
vic_elec |> gg_season(Demand, period = "week") +
  theme(legend.position = "none") +
  labs(y="MWh", title="Electricity demand: Victoria")

# Annual
vic_elec |> gg_season(Demand, period = "year") +
  labs(y="MWh", title="Electricity demand: Victoria")

Seasonal subseries plots

holidays <- tourism |>
  filter(Purpose == "Holiday") |>
  group_by(State) |>
  summarise(Trips = sum(Trips))

gg_season(holidays, Trips) +
  labs(y = "Overnight trips ('000)",
       title = "Australian domestic holidays")

holidays |>
  gg_subseries(Trips) +
  labs(y = "Overnight trips ('000)",
       title = "Australian domestic holidays")

Correlation matrices

visitors <- tourism |>
  group_by(State) |>
  summarise(Trips = sum(Trips))
visitors |>
  ggplot(aes(x = Quarter, y = Trips)) +
  geom_line() +
  facet_grid(vars(State), scales = "free_y") +
  labs(title = "Australian domestic tourism",
       y= "Overnight trips ('000)")

visitors |>
  pivot_wider(values_from=Trips, names_from=State) |>
  GGally::ggpairs(columns = 2:9)
Registered S3 method overwritten by 'GGally':
  method from   
  +.gg   ggplot2

Lag plots

Lag 1 graphs each quarter against the previous quarter.

Lag 2 graphs each quarter against 2 previous quarter.

And so on…

recent_production <- aus_production |>
  filter(year(Quarter) >= 2000)
recent_production |>
  gg_lag(Beer, geom = "point") +
  labs(x = "lag(Beer, k)")

We can see lag 4 and lag 8 have a strong positive correlation, that is due to the seasonal pattern every year(summer)

Autocorreltaion functions (ACF)

When having all the lags from the previous topic, we can calculate the autocorreltaion of each lag, which is what we graphed in the lag plots, but now with values.

Once we have those values, we are going to be able to see the same patterns, strong negative correlation in lag 2, 6, 10, … and strong positive correlation in lag 4, 8, … That’s due to trend, seasonality and a combination of both.

Some rules:

  • When data have a trend, the autocorrelations for small lags tend to be large and positive.
  • When data are seasonal, the autocorrelations will be larger at the seasonal lags.
  • When data have both, you see a combination of these effects.
recent_production |> ACF(Beer, lag_max = 9)
# A tsibble: 9 x 2 [1Q]
       lag      acf
  <cf_lag>    <dbl>
1       1Q -0.0530 
2       2Q -0.758  
3       3Q -0.0262 
4       4Q  0.802  
5       5Q -0.0775 
6       6Q -0.657  
7       7Q  0.00119
8       8Q  0.707  
9       9Q -0.0888 

We plot them:

recent_production |>
  ACF(Beer) |>
  autoplot() + labs(title="Australian beer production")

White noise

These occurs when there is no seasonality, trend so the autocorrelations are close to 0 (Everything is all over the place and seems random).

It’s uncorrealated, with zero mean and constant variance.

Let’s make some random graph:

set.seed(30)
y <- tsibble(sample = 1:50, wn = rnorm(50), index = sample)
y |> autoplot(wn) + labs(title = "White noise", y = "")

y |>
  ACF(wn) |>
  autoplot() + labs(title = "White noise")

As we can see, there is really no spikes out of the range.

That´s why the range (blue line) is really important to graph, which is the 95% of critical values.

Exercises

1.

autoplot(aus_production, Bricks)
Warning: Removed 20 rows containing missing values (`geom_line()`).

autoplot(pelt, Lynx)

autoplot(gafa_stock, Close)

autoplot(vic_elec, Demand)

2.

group_by(gafa_stock, Symbol) %>% 
  filter(Close == max(Close)) %>% 
  select(Symbol, Date, Close)
# A tsibble: 4 x 3 [!]
# Key:       Symbol [4]
# Groups:    Symbol [4]
  Symbol Date       Close
  <chr>  <date>     <dbl>
1 AAPL   2018-10-03  232.
2 AMZN   2018-09-04 2040.
3 FB     2018-07-25  218.
4 GOOG   2018-07-26 1268.

3.

tute1 <- readr::read_csv("Excels/tute1.csv")
Rows: 100 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
dbl  (3): Sales, AdBudget, GDP
date (1): Quarter

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(tute1)
(tute <- tute1 |>
  mutate(Quarter = yearquarter(Quarter)) |>
  as_tsibble(index = Quarter))
# A tsibble: 100 x 4 [1Q]
   Quarter Sales AdBudget   GDP
     <qtr> <dbl>    <dbl> <dbl>
 1 1981 Q1 1020.     659.  252.
 2 1981 Q2  889.     589   291.
 3 1981 Q3  795      512.  291.
 4 1981 Q4 1004.     614.  292.
 5 1982 Q1 1058.     647.  279.
 6 1982 Q2  944.     602   254 
 7 1982 Q3  778.     531.  296.
 8 1982 Q4  932.     608.  272.
 9 1983 Q1  996.     638.  260.
10 1983 Q2  908.     582.  280.
# … with 90 more rows
tute |>
  pivot_longer(-Quarter) |>
  ggplot(aes(x = Quarter, y = value, colour = name)) +
  geom_line() +
  facet_grid(name ~ ., scales = "free_y")

4.

library("USgas")
(us_total |> 
  as_tsibble(index = year, key = state) -> us_total)
# A tsibble: 1,266 x 3 [1Y]
# Key:       state [53]
    year state        y
   <int> <chr>    <int>
 1  1997 Alabama 324158
 2  1998 Alabama 329134
 3  1999 Alabama 337270
 4  2000 Alabama 353614
 5  2001 Alabama 332693
 6  2002 Alabama 379343
 7  2003 Alabama 350345
 8  2004 Alabama 382367
 9  2005 Alabama 353156
10  2006 Alabama 391093
# … with 1,256 more rows
filter(us_total, state %in% c("Maine", "Vermont", "New Hampshire", "Massachusetts", "Connecticut", "Rhode Island")) |> 
  ggplot(aes(x = year, y = y)) +
  geom_line(aes(color = state)) + 
  labs(y = "Gas consumption", title = "Annual natural gas consumption")

5.

tourism <- readxl::read_excel("Excels/tourism.xlsx")
(mutate(tourism, Quarter = yearquarter(Quarter)) |> 
  as_tsibble(index = Quarter, key = c(Region, State, Purpose)) -> tourism)
# A tsibble: 24,320 x 5 [1Q]
# Key:       Region, State, Purpose [304]
   Quarter Region   State           Purpose  Trips
     <qtr> <chr>    <chr>           <chr>    <dbl>
 1 1998 Q1 Adelaide South Australia Business  135.
 2 1998 Q2 Adelaide South Australia Business  110.
 3 1998 Q3 Adelaide South Australia Business  166.
 4 1998 Q4 Adelaide South Australia Business  127.
 5 1999 Q1 Adelaide South Australia Business  137.
 6 1999 Q2 Adelaide South Australia Business  200.
 7 1999 Q3 Adelaide South Australia Business  169.
 8 1999 Q4 Adelaide South Australia Business  134.
 9 2000 Q1 Adelaide South Australia Business  154.
10 2000 Q2 Adelaide South Australia Business  169.
# … with 24,310 more rows
tsibble::tourism 
# A tsibble: 24,320 x 5 [1Q]
# Key:       Region, State, Purpose [304]
   Quarter Region   State           Purpose  Trips
     <qtr> <chr>    <chr>           <chr>    <dbl>
 1 1998 Q1 Adelaide South Australia Business  135.
 2 1998 Q2 Adelaide South Australia Business  110.
 3 1998 Q3 Adelaide South Australia Business  166.
 4 1998 Q4 Adelaide South Australia Business  127.
 5 1999 Q1 Adelaide South Australia Business  137.
 6 1999 Q2 Adelaide South Australia Business  200.
 7 1999 Q3 Adelaide South Australia Business  169.
 8 1999 Q4 Adelaide South Australia Business  134.
 9 2000 Q1 Adelaide South Australia Business  154.
10 2000 Q2 Adelaide South Australia Business  169.
# … with 24,310 more rows
group_by(tourism, Region, Purpose) |> 
  filter(Trips == max(Trips)) |> 
  select(-State) |> 
  arrange(desc(Trips))
# A tsibble: 304 x 5 [1Q]
# Key:       Region, State, Purpose [304]
# Groups:    Region, Purpose [304]
   Quarter Region          Purpose  Trips State          
     <qtr> <chr>           <chr>    <dbl> <chr>          
 1 2017 Q4 Melbourne       Visiting  985. Victoria       
 2 2001 Q4 Sydney          Business  948. New South Wales
 3 2016 Q4 Sydney          Visiting  921. New South Wales
 4 1998 Q1 South Coast     Holiday   915. New South Wales
 5 2016 Q1 North Coast NSW Holiday   906. New South Wales
 6 1998 Q1 Sydney          Holiday   828. New South Wales
 7 2017 Q4 Melbourne       Holiday   806. Victoria       
 8 2016 Q4 Brisbane        Visiting  796. Queensland     
 9 2002 Q1 Gold Coast      Holiday   711. Queensland     
10 2017 Q3 Melbourne       Business  704. Victoria       
# … with 294 more rows
# Melbourne, Visiting
(state_tourism <- group_by(tourism, State) |> 
   summarise(total_Trips = sum(Trips)))
# A tsibble: 640 x 3 [1Q]
# Key:       State [8]
   State Quarter total_Trips
   <chr>   <qtr>       <dbl>
 1 ACT   1998 Q1        551.
 2 ACT   1998 Q2        416.
 3 ACT   1998 Q3        436.
 4 ACT   1998 Q4        450.
 5 ACT   1999 Q1        379.
 6 ACT   1999 Q2        558.
 7 ACT   1999 Q3        449.
 8 ACT   1999 Q4        595.
 9 ACT   2000 Q1        600.
10 ACT   2000 Q2        557.
# … with 630 more rows

6.

aus_arrivals
# A tsibble: 508 x 3 [1Q]
# Key:       Origin [4]
   Quarter Origin Arrivals
     <qtr> <chr>     <int>
 1 1981 Q1 Japan     14763
 2 1981 Q2 Japan      9321
 3 1981 Q3 Japan     10166
 4 1981 Q4 Japan     19509
 5 1982 Q1 Japan     17117
 6 1982 Q2 Japan     10617
 7 1982 Q3 Japan     11737
 8 1982 Q4 Japan     20961
 9 1983 Q1 Japan     20671
10 1983 Q2 Japan     12235
# … with 498 more rows
autoplot(aus_arrivals)
Plot variable not specified, automatically selected `.vars = Arrivals`

gg_season(aus_arrivals)
Plot variable not specified, automatically selected `y = Arrivals`

gg_subseries(aus_arrivals)
Plot variable not specified, automatically selected `y = Arrivals`

As we can see all 4 different origins have a seasonal pattern, some more than others, with UK being the one with more oscillation every year and US the less, this means there are certain events or weather between UK and Asutralia that makes people travel to Australia depending on the time of the year.

We can also see that Japan had an increasing trend until 1996, all the others present and slight or more visible increasing trend all along.

In the seasonal plot we can confirm all the previous observations made about seasonality and can also see when the seasonlity comes from (quarters of the year) for each country.

7.

set.seed(352)
retail <- aus_retail |>
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))

retail
# A tsibble: 441 x 5 [1M]
# Key:       State, Industry [1]
   State    Industry                  `Series ID`    Month Turnover
   <chr>    <chr>                     <chr>          <mth>    <dbl>
 1 Victoria Household goods retailing A3349643V   1982 Apr     173.
 2 Victoria Household goods retailing A3349643V   1982 May     180.
 3 Victoria Household goods retailing A3349643V   1982 Jun     167.
 4 Victoria Household goods retailing A3349643V   1982 Jul     174.
 5 Victoria Household goods retailing A3349643V   1982 Aug     178.
 6 Victoria Household goods retailing A3349643V   1982 Sep     180.
 7 Victoria Household goods retailing A3349643V   1982 Oct     190.
 8 Victoria Household goods retailing A3349643V   1982 Nov     224.
 9 Victoria Household goods retailing A3349643V   1982 Dec     321.
10 Victoria Household goods retailing A3349643V   1983 Jan     179.
# … with 431 more rows
autoplot(retail, Turnover)

gg_season(retail, Turnover)

gg_subseries(retail, Turnover)

gg_lag(retail, Turnover, geom = "point")

autoplot(ACF(retail, Turnover))

As we can see in our initial plot, we have Victoria’s retail turnover seperated by month, we can notice an obviuos upwards trend which is probably a little bit exponential by the end of the graph, we can also see an seasonality which is bigger as the amount of turnovers are increasing (proportional).

In the seasonal plot we can spot where the spikes of seasonality are, which are due to holidays, we can also see a more irregular spike through the years in summer break.

In the seasonal subseries plot we can confirm the proportion between spikes and amount of turnovers with december being the month with more turnovers but also, having an increase in turnovers throughout the years.

Due to the combination of strong seasonality and upwards trend, all lags between several months have a very strong autocorrelation.

We can confirm when we plot the autocorrelation function, we can see how every value is way above the range, due to strong upwards trend, and how there a few spikes every 12 months, due to seasonality.

8.

(us_employment <- filter(us_employment, Title == "Total Private"))
# A tsibble: 969 x 4 [1M]
# Key:       Series_ID [1]
      Month Series_ID     Title         Employed
      <mth> <chr>         <chr>            <dbl>
 1 1939 Jan CEU0500000001 Total Private    25338
 2 1939 Feb CEU0500000001 Total Private    25447
 3 1939 Mar CEU0500000001 Total Private    25833
 4 1939 Apr CEU0500000001 Total Private    25801
 5 1939 May CEU0500000001 Total Private    26113
 6 1939 Jun CEU0500000001 Total Private    26485
 7 1939 Jul CEU0500000001 Total Private    26481
 8 1939 Aug CEU0500000001 Total Private    26848
 9 1939 Sep CEU0500000001 Total Private    27468
10 1939 Oct CEU0500000001 Total Private    27830
# … with 959 more rows
autoplot(us_employment, Employed)

gg_season(us_employment, Employed)

gg_subseries(us_employment, Employed)

gg_lag(us_employment, Employed, geom = "point")

autoplot(ACF(us_employment, Employed))

Strong upwards trend, very small seasonality, we can confirm this in the seasonal plot, though there is an slight drop in the trend around 2010, we can see this in the seasonal subseries plot, it has strong positive correlation due to seasonality and trend, we can see how ACF is being affected by the strong trend and there is not much difference in multiples of 12 (annual) due to an small seasonality.

aus_production
# A tsibble: 218 x 7 [1Q]
   Quarter  Beer Tobacco Bricks Cement Electricity   Gas
     <qtr> <dbl>   <dbl>  <dbl>  <dbl>       <dbl> <dbl>
 1 1956 Q1   284    5225    189    465        3923     5
 2 1956 Q2   213    5178    204    532        4436     6
 3 1956 Q3   227    5297    208    561        4806     7
 4 1956 Q4   308    5681    197    570        4418     6
 5 1957 Q1   262    5577    187    529        4339     5
 6 1957 Q2   228    5651    214    604        4811     7
 7 1957 Q3   236    5317    227    603        5259     7
 8 1957 Q4   320    6152    222    582        4735     6
 9 1958 Q1   272    5758    199    554        4608     5
10 1958 Q2   233    5641    229    620        5196     7
# … with 208 more rows
autoplot(aus_production, Bricks)
Warning: Removed 20 rows containing missing values (`geom_line()`).

gg_season(aus_production, Bricks)
Warning: Removed 20 rows containing missing values (`geom_line()`).

gg_subseries(aus_production, Bricks)
Warning: Removed 5 rows containing missing values (`geom_line()`).

gg_lag(aus_production, Bricks, geom = "point")
Warning: Removed 20 rows containing missing values (gg_lag).

autoplot(ACF(aus_production, Bricks))

We can see an increasing trend in the first half, then two important drops, and after that it stayed very regular in a general perspective, has a seasonality which varies a little bit everytime. Looking at the seasonal plot we can see a very confirm the highs and lows in the second half and how seasonality stops being simple compared to the first half. Due to loss of strong trend and not very defined seasonality, the correlations and lags are not very strong. But we can also see how there is a seasonality every four-month period.

(pelt = pelt %>% as_tsibble(index = Year))
# A tsibble: 91 x 3 [1Y]
    Year  Hare  Lynx
   <dbl> <dbl> <dbl>
 1  1845 19580 30090
 2  1846 19600 45150
 3  1847 19610 49150
 4  1848 11990 39520
 5  1849 28040 21230
 6  1850 58000  8420
 7  1851 74600  5560
 8  1852 75090  5080
 9  1853 88480 10170
10  1854 61280 19600
# … with 81 more rows
autoplot(pelt, Hare)

gg_lag(pelt, Hare, geom = "point")

autoplot(ACF(pelt, Hare))

This data is annual, so it’s unfair to say it has seasonality throughout the years due to the lack of information in each year. Nevertheless, we can try to analyze this data, and looking at the line plot and acf plot, we can notice a seasonality every 10 years, with a whole cycle of a drop and a spike.

(PB <- tsibbledata::PBS %>% 
   as_tibble() %>% 
   group_by(Month, ATC2) %>% 
   summarise(Cost = sum(Cost)) %>% 
   as_tsibble(index = Month, key = ATC2) %>% 
   filter(ATC2 == "H02"))
`summarise()` has grouped output by 'Month'. You can override using the
`.groups` argument.
# A tsibble: 204 x 3 [1M]
# Key:       ATC2 [1]
# Groups:    @ Month [204]
      Month ATC2    Cost
      <mth> <chr>  <dbl>
 1 1991 Jul H02   429795
 2 1991 Aug H02   400906
 3 1991 Sep H02   432159
 4 1991 Oct H02   492543
 5 1991 Nov H02   502369
 6 1991 Dec H02   602652
 7 1992 Jan H02   660119
 8 1992 Feb H02   336220
 9 1992 Mar H02   351348
10 1992 Apr H02   379808
# … with 194 more rows
autoplot(PB, Cost)

gg_season(PB, Cost)

gg_subseries(PB, Cost)

gg_lag(PB, Cost, geom = "point")

autoplot(ACF(PB, Cost))

We can see a clear and very strong seasonlity with a big drop in January and recovery proccess the whole year until it ends in the same point. This with a general upwards trend. In the seasonal plot we can see how the drop is very clean and spontenaous, and how the whole year it recovers with very different highs and lows. We can see in ACF how the annual seasonality is very strong, and all the other correlations are very weak due to very different values throughout the year.

us_gasoline
# A tsibble: 1,355 x 2 [1W]
       Week Barrels
     <week>   <dbl>
 1 1991 W06    6.62
 2 1991 W07    6.43
 3 1991 W08    6.58
 4 1991 W09    7.22
 5 1991 W10    6.88
 6 1991 W11    6.95
 7 1991 W12    7.33
 8 1991 W13    6.78
 9 1991 W14    7.50
10 1991 W15    6.92
# … with 1,345 more rows
autoplot(us_gasoline, Barrels)

gg_season(us_gasoline, Barrels)

gg_subseries(us_gasoline, Barrels)

gg_lag(us_gasoline, Barrels, geom = "point")

autoplot(ACF(us_gasoline, Barrels))

We can see a general upwards trend which slightly stops and changes at the end of the graphic, we can see a lot of changes in the seasonal plot due to weekly data. Strong correlations due to seasonality and trend.

9.

2A, 3D, 1B, 4C

10.

aus_live <- filter(aus_livestock, year(Month) >= 1990 & year(Month) <= 1995, State == "Victoria", Animal == "Pigs")
autoplot(aus_live, Count)

autoplot(ACF(aus_live, Count))

We can see a very obvious cahnge, when we filter the data, we see a strong upwards trend, this graph can actually be a cycle in the whole picture and life of the data instead of a general trend, We reveal this when we use all the data. This makes the range of the ACF be bigger and closer to the data in the filtered one.

11.

(gafa_stock |>
  filter(Symbol == "GOOG", year(Date) >= 2018) |>
  mutate(trading_day = row_number()) %>% 
  mutate(diff = difference(Close)) -> dgoog1)
# A tsibble: 251 x 10 [!]
# Key:       Symbol [1]
   Symbol Date        Open  High   Low Close Adj_Close  Volume trading_…¹   diff
   <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>   <dbl>      <int>  <dbl>
 1 GOOG   2018-01-02 1048. 1067. 1045. 1065      1065  1237600          1 NA    
 2 GOOG   2018-01-03 1064. 1086. 1063. 1082.     1082. 1430200          2 17.5  
 3 GOOG   2018-01-04 1088  1094. 1084. 1086.     1086. 1004600          3  3.92 
 4 GOOG   2018-01-05 1094  1104. 1092  1102.     1102. 1279100          4 15.8  
 5 GOOG   2018-01-08 1102. 1111. 1102. 1107.     1107. 1047600          5  4.71 
 6 GOOG   2018-01-09 1109. 1111. 1101. 1106.     1106.  902500          6 -0.680
 7 GOOG   2018-01-10 1097. 1105. 1096. 1103.     1103. 1042800          7 -3.65 
 8 GOOG   2018-01-11 1106. 1107. 1100. 1106.     1106.  978300          8  2.91 
 9 GOOG   2018-01-12 1102. 1124. 1101. 1122.     1122. 1720500          9 16.7  
10 GOOG   2018-01-16 1133. 1140. 1118. 1122.     1122. 1575300         10 -0.5  
# … with 241 more rows, and abbreviated variable name ¹​trading_day
(dgoog <- gafa_stock |>
  filter(Symbol == "GOOG", year(Date) >= 2018) |>
  mutate(trading_day = row_number()) |>
  update_tsibble(index = trading_day, regular = TRUE) |>
  mutate(diff = difference(Close)))
# A tsibble: 251 x 10 [1]
# Key:       Symbol [1]
   Symbol Date        Open  High   Low Close Adj_Close  Volume trading_…¹   diff
   <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>   <dbl>      <int>  <dbl>
 1 GOOG   2018-01-02 1048. 1067. 1045. 1065      1065  1237600          1 NA    
 2 GOOG   2018-01-03 1064. 1086. 1063. 1082.     1082. 1430200          2 17.5  
 3 GOOG   2018-01-04 1088  1094. 1084. 1086.     1086. 1004600          3  3.92 
 4 GOOG   2018-01-05 1094  1104. 1092  1102.     1102. 1279100          4 15.8  
 5 GOOG   2018-01-08 1102. 1111. 1102. 1107.     1107. 1047600          5  4.71 
 6 GOOG   2018-01-09 1109. 1111. 1101. 1106.     1106.  902500          6 -0.680
 7 GOOG   2018-01-10 1097. 1105. 1096. 1103.     1103. 1042800          7 -3.65 
 8 GOOG   2018-01-11 1106. 1107. 1100. 1106.     1106.  978300          8  2.91 
 9 GOOG   2018-01-12 1102. 1124. 1101. 1122.     1122. 1720500          9 16.7  
10 GOOG   2018-01-16 1133. 1140. 1118. 1122.     1122. 1575300         10 -0.5  
# … with 241 more rows, and abbreviated variable name ¹​trading_day

Because a stock doesn’t have a continous index due to not having any data in the weekends, that’s why we change to trade number and the data can be continuous. We can see this change in the line plot. But we cannot really see a difference in ACF. c.

autoplot(dgoog, Close)

autoplot(ACF(dgoog, Close))

autoplot(dgoog1, Close)

autoplot(ACF(dgoog1, Close))
Warning: Provided data has an irregular interval, results should be treated
with caution. Computing ACF by observation.

In a general point, there is no real difference between these two, the correlations graphed are the same.